!pip install dask
!pip install missingno
!pip install hvplot
import os
from glob import glob
import numpy as np
import pandas as pd
from dask import bag, diagnostics
from urllib import request
import cv2
import missingno as msno
import hvplot.pandas # custom install
from matplotlib import pyplot as plt
%matplotlib inline
Requirement already satisfied: dask in /opt/conda/lib/python3.8/site-packages (2022.1.0) Requirement already satisfied: pyyaml>=5.3.1 in /opt/conda/lib/python3.8/site-packages (from dask) (5.4.1) Requirement already satisfied: partd>=0.3.10 in /opt/conda/lib/python3.8/site-packages (from dask) (1.2.0) Requirement already satisfied: fsspec>=0.6.0 in /opt/conda/lib/python3.8/site-packages (from dask) (2022.1.0) Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.8/site-packages (from dask) (20.9) Requirement already satisfied: cloudpickle>=1.1.1 in /opt/conda/lib/python3.8/site-packages (from dask) (2.0.0) Requirement already satisfied: toolz>=0.8.2 in /opt/conda/lib/python3.8/site-packages (from dask) (0.11.2) Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging>=20.0->dask) (2.4.7) Requirement already satisfied: locket in /opt/conda/lib/python3.8/site-packages (from partd>=0.3.10->dask) (0.2.1) WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Requirement already satisfied: missingno in /opt/conda/lib/python3.8/site-packages (0.5.0) Requirement already satisfied: scipy in /opt/conda/lib/python3.8/site-packages (from missingno) (1.6.3) Requirement already satisfied: seaborn in /opt/conda/lib/python3.8/site-packages (from missingno) (0.11.2) Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from missingno) (1.20.3) Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (from missingno) (3.4.2) Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib->missingno) (9.0.0) Requirement already satisfied: pyparsing>=2.2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->missingno) (2.4.7) Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib->missingno) (0.10.0) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->missingno) (1.3.1) Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.8/site-packages (from matplotlib->missingno) (2.8.1) Requirement already satisfied: pandas>=0.23 in /opt/conda/lib/python3.8/site-packages (from seaborn->missingno) (1.1.4) Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from cycler>=0.10->matplotlib->missingno) (1.16.0) Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.23->seaborn->missingno) (2021.1) WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Requirement already satisfied: hvplot in /opt/conda/lib/python3.8/site-packages (0.7.3) Requirement already satisfied: colorcet>=2 in /opt/conda/lib/python3.8/site-packages (from hvplot) (3.0.0) Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (from hvplot) (1.1.4) Requirement already satisfied: bokeh>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from hvplot) (2.4.2) Requirement already satisfied: numpy>=1.15 in /opt/conda/lib/python3.8/site-packages (from hvplot) (1.20.3) Requirement already satisfied: holoviews>=1.11.0 in /opt/conda/lib/python3.8/site-packages (from hvplot) (1.14.7) Requirement already satisfied: typing-extensions>=3.10.0 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (3.10.0.0) Requirement already satisfied: pillow>=7.1.0 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (9.0.0) Requirement already satisfied: PyYAML>=3.10 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (5.4.1) Requirement already satisfied: tornado>=5.1 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (6.1) Requirement already satisfied: Jinja2>=2.9 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (3.0.1) Requirement already satisfied: packaging>=16.8 in /opt/conda/lib/python3.8/site-packages (from bokeh>=1.0.0->hvplot) (20.9) Requirement already satisfied: param>=1.7.0 in /opt/conda/lib/python3.8/site-packages (from colorcet>=2->hvplot) (1.12.0) Requirement already satisfied: pyct>=0.4.4 in /opt/conda/lib/python3.8/site-packages (from colorcet>=2->hvplot) (0.4.8) Requirement already satisfied: pyviz-comms>=0.7.4 in /opt/conda/lib/python3.8/site-packages (from holoviews>=1.11.0->hvplot) (2.1.0) Requirement already satisfied: panel>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from holoviews>=1.11.0->hvplot) (0.12.6) Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas->hvplot) (2.8.1) Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas->hvplot) (2021.1) Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.8/site-packages (from Jinja2>=2.9->bokeh>=1.0.0->hvplot) (2.0.1) Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging>=16.8->bokeh>=1.0.0->hvplot) (2.4.7) Requirement already satisfied: tqdm>=4.48.0 in /opt/conda/lib/python3.8/site-packages (from panel>=0.8.0->holoviews>=1.11.0->hvplot) (4.53.0) Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from panel>=0.8.0->holoviews>=1.11.0->hvplot) (3.3.0) Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from panel>=0.8.0->holoviews>=1.11.0->hvplot) (2.25.1) Requirement already satisfied: markdown in /opt/conda/lib/python3.8/site-packages (from panel>=0.8.0->holoviews>=1.11.0->hvplot) (3.3.4) Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas->hvplot) (1.16.0) Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->panel>=0.8.0->holoviews>=1.11.0->hvplot) (0.5.1) Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests->panel>=0.8.0->holoviews>=1.11.0->hvplot) (4.0.0) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->panel>=0.8.0->holoviews>=1.11.0->hvplot) (2021.5.30) Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->panel>=0.8.0->holoviews>=1.11.0->hvplot) (2.10) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->panel>=0.8.0->holoviews>=1.11.0->hvplot) (1.26.4) WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
from IPython.display import display, Image
data = pd.read_csv("imagewoof2/noisy_imagewoof.csv")
data
| path | noisy_labels_0 | noisy_labels_1 | noisy_labels_5 | noisy_labels_25 | noisy_labels_50 | is_valid | |
|---|---|---|---|---|---|---|---|
| 0 | train/n02115641/n02115641_3995.JPEG | n02115641 | n02115641 | n02115641 | n02115641 | n02115641 | False |
| 1 | train/n02115641/n02115641_843.JPEG | n02115641 | n02115641 | n02105641 | n02115641 | n02088364 | False |
| 2 | train/n02115641/n02115641_2953.JPEG | n02115641 | n02115641 | n02115641 | n02111889 | n02099601 | False |
| 3 | train/n02115641/n02115641_6458.JPEG | n02115641 | n02115641 | n02115641 | n02093754 | n02115641 | False |
| 4 | train/n02115641/n02115641_19414.JPEG | n02115641 | n02115641 | n02115641 | n02115641 | n02088364 | False |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 12949 | val/n02089973/n02089973_9351.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12950 | val/n02089973/n02089973_1241.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12951 | val/n02089973/n02089973_4702.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12952 | val/n02089973/n02089973_1040.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12953 | val/n02089973/n02089973_9591.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
12954 rows × 7 columns
display(Image(filename='imagewoof2/train/n02086240/ILSVRC2012_val_00000907.JPEG'))
data
| path | noisy_labels_0 | noisy_labels_1 | noisy_labels_5 | noisy_labels_25 | noisy_labels_50 | is_valid | |
|---|---|---|---|---|---|---|---|
| 0 | train/n02115641/n02115641_3995.JPEG | n02115641 | n02115641 | n02115641 | n02115641 | n02115641 | False |
| 1 | train/n02115641/n02115641_843.JPEG | n02115641 | n02115641 | n02105641 | n02115641 | n02088364 | False |
| 2 | train/n02115641/n02115641_2953.JPEG | n02115641 | n02115641 | n02115641 | n02111889 | n02099601 | False |
| 3 | train/n02115641/n02115641_6458.JPEG | n02115641 | n02115641 | n02115641 | n02093754 | n02115641 | False |
| 4 | train/n02115641/n02115641_19414.JPEG | n02115641 | n02115641 | n02115641 | n02115641 | n02088364 | False |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 12949 | val/n02089973/n02089973_9351.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12950 | val/n02089973/n02089973_1241.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12951 | val/n02089973/n02089973_4702.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12952 | val/n02089973/n02089973_1040.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
| 12953 | val/n02089973/n02089973_9591.JPEG | n02089973 | n02089973 | n02089973 | n02089973 | n02089973 | True |
12954 rows × 7 columns
train = data[data["is_valid"] == False]
val = data[data["is_valid"] == True]
train.to_csv("train.csv", index=False)
val.to_csv("val.csv", index=False)
!train.noisy_labels_50.value_counts()
n02115641 940 n02093754 927 n02096294 922 n02105641 919 n02088364 909 n02087394 909 n02086240 901 n02111889 896 n02099601 876 n02089973 826 Name: noisy_labels_50, dtype: int64
train.noisy_labels_0.value_counts(normalize=True)
n02093754 0.105152 n02099601 0.105152 n02096294 0.104488 n02087394 0.104377 n02086240 0.104266 n02115641 0.104155 n02088364 0.103269 n02105641 0.102825 n02111889 0.102050 n02089973 0.064266 Name: noisy_labels_0, dtype: float64
val.noisy_labels_0.value_counts(normalize=True)
n02111889 0.109188 n02105641 0.107406 n02088364 0.106388 n02115641 0.104352 n02086240 0.104098 n02087394 0.103843 n02096294 0.103589 n02093754 0.102062 n02099601 0.102062 n02089973 0.057012 Name: noisy_labels_0, dtype: float64